/* written paper 1 fall 2007 */ use earningsdata_males gen gender = 1 append using earningsdata_females replace gender = 2 if gender==. label define genderlbl 1 "male" 2 "female" label values gender genderlbl /* inspect the data */ desc summ /* question 1 */ summ ln_y_ if gender==1 return list gen mean_male = r(mean) gen N_male = r(N) gen sd_mean_male = r(sd)/sqrt(N_male) summ ln_y_ if gender==2 return list gen mean_female = r(mean) gen N_female = r(N) gen sd_mean_female = r(sd)/sqrt(N_female) summ mean_* sd_* N_* /* confidence intervals for the respective means */ gen ci_mean_male_lower = mean_male - 1.64*sd_mean_male gen ci_mean_male_upper = mean_male + 1.64*sd_mean_male disp "90% confidence interval for male log earnings is [" ci_mean_male_lower "," ci_mean_male_upper "]" gen ci_mean_male_lower5 = mean_male - 1.96*sd_mean_male gen ci_mean_male_upper5 = mean_male + 1.96*sd_mean_male disp "95% confidence interval for male log earnings is [" ci_mean_male_lower5 "," ci_mean_male_upper5 "]" gen ci_mean_female_lower = mean_female - 1.64*sd_mean_female gen ci_mean_female_upper = mean_female + 1.64*sd_mean_female disp "90% confidence interval for female log earnings is [" ci_mean_female_lower "," ci_mean_female_upper "]" gen ci_mean_female_lower5 = mean_female - 1.96*sd_mean_female gen ci_mean_female_upper5 = mean_female + 1.96*sd_mean_female disp "95% confidence interval for female log earnings is [" ci_mean_female_lower5 "," ci_mean_female_upper5 "]" /* test whether the means are different */ gen diff = mean_male - mean_female gen se_diff = sqrt((sd_mean_male^2)+(sd_mean_female^2)) gen t = diff/se_diff disp t /* p-value of test of equal means */ gen p = 2*normal(-abs(t)) /* norm in stata8 */ disp p /* the p-value is less than 0.01, se we reject the H0 that the means are equal at the 1% level */ /************************************************************************************************************/ /* question 2 */ egen mean_sample = mean(ln_y_) gen temp = exp(mean_sample) gen earnings = exp(ln_y_) egen mean_earnings = mean(earnings) /* mean earnings are not equal to exp(ln(mean earnings)) since ln is not a linear operator */ /************************************************************************************************************/ /* question 3 */ reg ln_y_ s if gender==1 gen alpha_male = _b[_cons] gen beta_male = _b[s] gen se_male = _se[s] ereturn list gen r2_male = e(r2) /* make graph */ twoway scatter ln_y_ s if gender==1||lfit ln_y_ s if gender==1, legend(off) reg ln_y_ s if gender==2 gen alpha_female = _b[_cons] gen beta_female = _b[s] gen se_female = _se[s] ereturn list gen r2_female = e(r2) /* make graph */ twoway scatter ln_y_ s if gender==2||lfit ln_y_ s if gender==2, legend(off) /* OLS assumptions are: */ /* (1) E(ui|Xi)=0 */ /* (2) Xi, Yi for i = 1,...,n are iid draws from their joint distribution */ /* (3) Large outliers are unlikely: 01.96 */ gen t_female = _b[s_female]/_se[s_female] disp t_female gen p_female = 2*normal(-abs(t_female)) /* norm in stata8 */ disp p_female /* use an F-test for the two schooling coefficients being the same */ gen d_male = gender==1 gen s_male = s*d_male reg ln_y_ d_male d_female s_male s_female, noconst test s_female = s_male /************************************************************************************************************/ /* question 8 */ /* other specifications */ /* only experience */ reg ln_y_ e e_2 if gender==1 /* only private/public sector */ reg ln_y_ public servi if gender==1, noconst /* only type of education */ reg ln_y_ unsp-serv if gender==1 /* only region */ reg ln_y_ ostf-finmark if gender==1